*******		REPLICATION FILES
*******		Climate Variability and Irregular Migration to the European Union 
*******		Global Environmental Change 
*******		Fabien Cottier and Idean Salehyan
*******		Replication: Figure 3 (Models 1, Table 1), main text
*******		This version: April 2021

* note: stata packages required for running models
* - coefplot
* - crossfold
* - plottig

clear all

set more off
set scheme s1mono
cd "path/to/replication/directory"

* load data
use "data_quarter.dta", clear 

* duplicates data
duplicates list cowc year quarter
duplicates tag cowc year quarter, gen(_dupl)
duplicates drop cowc year quarter, force

* set time series
sort cowc year quarter
gen quarter_int = real(regexs(1)) if regexm(quarter,"([0-9]+)")
gen quarterS=(year-2005)*4+quarter_int
order cowc nationality year quarter quarter_int quarterS
tsset cowc quarterS
 
* gen additional variables
encode continent, gen(contN)
recode contN (5=.)

* generate rate migration variable
gen Rmigrq_exbalk=(nmigrq/pop)*(10^5)
gen Rmigrq_exbalk_ln=log(Rmigrq_exbalk+1)

* generate lag quartely migration variables
by cowc: gen nmigrq_exbalk_ln_1tl = nmigrq_exbalk_ln[_n-1]
by cowc: gen nmigrq_exbalk_ln_2tl = nmigrq_exbalk_ln[_n-2]
by cowc: gen nmigrq_exbalk_ln_3tl = nmigrq_exbalk_ln[_n-3]
by cowc: gen nmigrq_exbalk_ln_4tl = nmigrq_exbalk_ln[_n-4]

by cowc: gen Rmigrq_exbalk_ln_1tl = Rmigrq_exbalk_ln[_n-1]
by cowc: gen Rmigrq_exbalk_ln_2tl = Rmigrq_exbalk_ln[_n-2]
by cowc: gen Rmigrq_exbalk_ln_3tl = Rmigrq_exbalk_ln[_n-3]
by cowc: gen Rmigrq_exbalk_ln_4tl = Rmigrq_exbalk_ln[_n-4]


* gen dummies variables for sample splitting: low agriculture / high agriculture
qui xtreg nmigrq_exbalk_ln nmigrq_exbalk_ln_* i.year if migr100_exbalk==1, ///
 fe vce(cluster cowc)

sum agriLab if e(sample) & year==2010, d
scalar agriMed=r(p50)
gen agri_H=0 if e(sample) & year==2010
recode agri_H (0=1) if agriLab-agriMed > 0 & year==2010
by cowc: replace agri_H = agri_H[21] if missing(agri_H)


* generate scalars for post-estimation analysis
qui xtreg nmigrq_exbalk_ln nmigrq_exbalk_ln_* wmean_speiy /// 
 i.year i.quarter_int if migr100_exbalk==1, ///
 fe vce(cluster cowc)

sum wmean_speiy if e(sample)
scalar wspeiy_sd = r(sd)

sum wmean_speiq if e(sample)
scalar wspeiq_sd = r(sd)

sum wmean_spei_gs if e(sample)
scalar wspei_gs_sd = r(sd)

sum mean_speiy if e(sample)
scalar speiy_sd = r(sd)

sum stdw10ma_temp if e(sample)
scalar tempy_sd = r(sd)

sum stdw10ma_precip if e(sample)
scalar precipy_sd = r(sd)

qui xtreg nmigrq_exbalk_ln nmigrq_exbalk_ln_* i.year wmean_speiy, ///
 fe vce(cluster cowc)
sum wmean_speiy if e(sample)
scalar wspeiy_sd_all = r(sd)



****************************** figure 3 ************************


* for detailed results, see do files for the sensitivity analyses


* estimate models from sensitivity analyses

* main models
xtreg nmigrq_exbalk_ln nmigrq_exbalk_ln_* wmean_speiy ///
    i.year i.quarter_int if migr100_exbalk==1, ///
    fe vce(cluster cowc)
est sto tab1_m1
global df_tab1_m1=e(df_r)

nlcom _b[wmean_speiy]*wspeiy_sd*-1, post df($df_tab1_m1) 
est sto tab1_m1_neg
est resto tab1_m1
nlcom _b[wmean_speiy]*wspeiy_sd, post df($df_tab1_m1) 
est sto tab1_m1_pos

* S1 growing season
xtreg nmigrq_exbalk_ln nmigrq_exbalk_ln_* wmean_spei_gs ///
    i.year i.quarter_int if migr100_exbalk==1, ///
    fe vce(cluster cowc)
est sto S1_m1
global df_S1_m1=e(df_r)

nlcom _b[wmean_spei_gs]*wspei_gs_sd*-1, post df($df_S1_m1) 
est sto S1_m1_neg
est resto S1_m1
nlcom _b[wmean_spei_gs]*wspei_gs_sd, post df($df_S1_m1) 
est sto S1_m1_pos

* S2 rate variable
xtreg Rmigrq_exbalk_ln Rmigrq_exbalk_ln_* wmean_speiy ///
    i.year i.quarter_int if migr100_exbalk==1, ///
    fe vce(cluster cowc)
est sto S2_m1
global df_S2_m1=e(df_r)

nlcom _b[wmean_speiy]*wspeiy_sd*-1, post df($df_S2_m1) 
est sto S2_m1_neg
est resto S2_m1
nlcom _b[wmean_speiy]*wspeiy_sd, post df($df_S2_m1) 
est sto S2_m1_pos

* S3 quasi poisson
xtpois nmigrq_exbalk nmigrq_exbalk_ln_* wmean_speiy ///
    i.year i.quarter_int if migr100_exbalk==1, ///
    fe vce(robust)
est sto S3_m1

nlcom _b[wmean_speiy]*wspeiy_sd*-1, post
est sto S3_m1_neg
est resto S3_m1
nlcom _b[wmean_speiy]*wspeiy_sd, post
est sto S3_m1_pos

* S4 quarterly SPEI data
xtreg nmigrq_exbalk_ln nmigrq_exbalk_ln_* wmean_speiq L1.wmean_speiq ///
    L2.wmean_speiq L3.wmean_speiq ///
    i.year i.quarter_int if migr100_exbalk==1, ///
    fe vce(cluster cowc)
est sto S4_m1
global df_S4_m1=e(df_r)

nlcom _b[wmean_speiq]*wspeiq_sd*-1+ ///
    _b[L1.wmean_speiq]*wspeiq_sd*-1+ ///
    _b[L2.wmean_speiq]*wspeiq_sd*-1+ ///
    _b[L3.wmean_speiq]*wspeiq_sd*-1, post df($df_S4_m1) 
est sto S4_m1_neg
est resto S4_m1
nlcom _b[wmean_speiq]*wspeiq_sd+ ///
    _b[L1.wmean_speiq]*wspeiq_sd+ ///
    _b[L2.wmean_speiq]*wspeiq_sd+ ///
    _b[L3.wmean_speiq]*wspeiq_sd, post df($df_S4_m1)
est sto S4_m1_pos


* S5 annual data

preserve

use "data_annual.dta", clear

* duplicates data
duplicates tag cowc year , gen(_dupl)
duplicates drop cowc year , force

* set time series
order cowc nationality year
tsset cowc year

* generate lag quartely migration variables
by cowc: gen nmigry_exbalk_ln_1tl = nmigry_exbalk_ln[_n-1]

* gen dummies variables for sample splitting
qui xtreg nmigry_exbalk_ln nmigry_exbalk_ln_1tl wmean_speiy i.year if migr100_exbalk==1, ///
 fe vce(cluster cowc)
sum wmean_speiy if e(sample)
scalar wmean_speiy_annual = r(sd)

* estimate model
xtreg nmigry_exbalk_ln nmigry_exbalk_ln_1tl wmean_speiy ///
    i.year if migr100_exbalk==1, ///
    fe vce(cluster cowc)
est sto S5_m1
global df_S5_m1=e(df_r)

nlcom _b[wmean_speiy]*wspeiy_sd*-1, post df($df_S5_m1)
est sto S5_m1_neg
est resto S5_m1
nlcom _b[wmean_speiy]*wspeiy_sd, post df($df_S5_m1)
est sto S5_m1_pos

restore

* S6 All countries
xtreg nmigrq_exbalk_ln nmigrq_exbalk_ln_* wmean_speiy ///
    i.year i.quarter_int, ///
    fe vce(cluster cowc)
est sto S6_m1
global df_S6_m1=e(df_r)

nlcom _b[wmean_speiy]*wspeiy_sd_all*-1, post df($df_S6_m1) 
est sto S6_m1_neg
est resto S6_m1
nlcom _b[wmean_speiy]*wspeiy_sd_all, post df($df_S6_m1) 
est sto S6_m1_pos


* S7 lag migration variable
xtreg nmigrq_exbalk_ln wmean_speiy ///
    i.year i.quarter_int if migr100_exbalk==1, ///
    fe vce(cluster cowc)
est sto S7_m1
global df_S7_m1=e(df_r)

nlcom _b[wmean_speiy]*wspeiy_sd*-1, post df($df_S7_m1) 
est sto S7_m1_neg
est resto S7_m1
nlcom _b[wmean_speiy]*wspeiy_sd, post df($df_S7_m1) 
est sto S7_m1_pos


* S8 spei with non population weights
xtreg nmigrq_exbalk_ln nmigrq_exbalk_ln_* mean_speiy ///
    i.year i.quarter_int if migr100_exbalk==1, ///
    fe vce(cluster cowc)
est sto S8_m1
global df_S8_m1=e(df_r)

nlcom _b[mean_speiy]*speiy_sd*-1, post df($df_S8_m1) 
est sto S8_m1_neg
est resto S8_m1
nlcom _b[mean_speiy]*speiy_sd, post df($df_S8_m1) 
est sto S8_m1_pos


* S9 temperature and precipitation data

xtreg nmigrq_exbalk_ln nmigrq_exbalk_ln_* stdw10ma_temp  stdw10ma_precip ///
    i.year i.quarter_int if migr100_exbalk==1 & year<=2015, ///
    fe vce(cluster cowc)
est sto S9_m1
global df_S9_m1=e(df_r)

nlcom _b[stdw10ma_temp]*tempy_sd*-1, post df($df_S9_m1) 
est sto S9_m1_temp_neg
est resto S9_m1
nlcom _b[stdw10ma_temp]*tempy_sd, post df($df_S9_m1) 
est sto S9_m1_temp_pos

est resto S9_m1
nlcom _b[stdw10ma_precip]*precipy_sd*-1, post df($df_S9_m1) 
est sto S9_m1_precip_neg
est resto S9_m1
nlcom _b[stdw10ma_precip]*precipy_sd, post df($df_S9_m1) 
est sto S9_m1_precip_pos


**** plot

* collect estimates

global N = 11 /// Number of sensitivity checks ploted 

* negative deviation
matrix d_migr_neg = J($N,3,.)
matrix colnames d_migr_neg = est lb95 ub95 
matrix rownames d_migr_neg = "Main model" "S1 growing season" "S2 migration rate" ///
    "S3 quasi-poisson" "S4 quarterly SPEI" "S5 country-year" "S6 all observations" ///
    "S7 no lag DV" "S8 unweighted SPEI" "S9 temp anomalies" "S9 precip anomalies"

forv i = 1/$N {
   if `i'==1{
        local j = 0
        local model `"tab1_m1_neg"'
    }
    else if `i'==10 {
        local j = 9
        local model `"S9_m1_temp_neg"'
    }
    else if `i'==11 {
        local j = 9
        local model `"S9_m1_precip_neg"'
    }
    else {
        local j = `i'-1
        local model `"S`j'_m1_neg"'
    }
    est resto `model'
    matrix mat_b = e(b)
    matrix mat_V = e(V)
    matrix d_migr_neg[`i',1] = mat_b[1,1]
   if `j'==3{
        *xtpoisson
        matrix d_migr_neg[`i',2] = mat_b[1,1] - (invnormal(.975)*(sqrt(mat_V[1,1])))
        matrix d_migr_neg[`i',3] = mat_b[1,1] + (invnormal(.975)*(sqrt(mat_V[1,1])))
    }
    else {
        *xtreg
        local df = e(df_r)
        matrix d_migr_neg[`i',2] = mat_b[1,1] - (invt(`df',.975)*(sqrt(mat_V[1,1])))
        matrix d_migr_neg[`i',3] = mat_b[1,1] + (invt(`df',.975)*(sqrt(mat_V[1,1])))
    }
}

* positive deviation
matrix d_migr_pos = J($N,3,.)
matrix colnames d_migr_pos = est lb95 ub95
matrix rownames d_migr_pos = "Main model" "S1 growing season" "S2 migration rate" ///
    "S3 quasi-poisson" "S4 quarterly SPEI" "S5 country-year" "S6 all observations" ///
    "S7 no lag DV" "S8 unweighted SPEI" "S9 temp anomalies" "S9 precip anomalies"

forv i = 1/$N {
   if `i'==1{
        local j = 0
        local model `"tab1_m1_pos"'
    }
    else if `i'==10 {
        local j = 9
        local model `"S9_m1_temp_pos"'
    }
    else if `i'==11 {
        local j = 9
        local model `"S9_m1_precip_pos"'
    }
    else {
        local j = `i'-1
        local model `"S`j'_m1_pos"'
    }
    est resto `model'
    matrix mat_b = e(b)
    matrix mat_V = e(V)
    matrix d_migr_pos[`i',1] = mat_b[1,1]
   if `j'==3{
        *xtpoisson
        matrix d_migr_pos[`i',2] = mat_b[1,1] - (invnormal(.975)*(sqrt(mat_V[1,1])))
        matrix d_migr_pos[`i',3] = mat_b[1,1] + (invnormal(.975)*(sqrt(mat_V[1,1])))
    }
    else {
        *xtreg
        local df = e(df_r)
        matrix d_migr_pos[`i',2] = mat_b[1,1] - (invt(`df',.975)*(sqrt(mat_V[1,1])))
        matrix d_migr_pos[`i',3] = mat_b[1,1] + (invt(`df',.975)*(sqrt(mat_V[1,1])))
    }
}

* plot
coefplot(matrix(d_migr_neg[,1]), ci((2 3)) label(- 1 std deviation) msymbol(th)) ///
    (matrix(d_migr_pos[,1]), ci((2 3)) label(+ 1 std deviation) msymbol(sh)) ///
    , xline(1) eform xscale(range(.75 1.75)) xtick(.5 .75 1 1.25 1.5 1.75) ///
    xlabel(.5 "-50%" .75 " " 1 "0" 1.25 " " 1.5 "+ 50%" 1.75 " ")

